/*
This file gathers the raw data from the different register sources. Please see
the Readme file on the DataVerse for information on how to apply for access to
the raw data.
*/

// Setup twin dataset
clear all
set scheme s1mono
cd "C:\Userdata\Shared\Dofiles\PrelDoFiles\PrelRafael\Quantifying bias"


*************
* FUNCTIONS *
*************

* Add variable
* 1. URL
* 2. Variable name
* 3. New name
capture program drop getdata
program define getdata

	* Save existing as temp and load new
	qui: save temp, replace
	qui: use `1'
	qui: keep LopNr `2'
	
	* Drop duplicates
	qui: gen n=.
	qui: bysort LopNr: replace n=[_n]
	qui: keep if n==1
	qui: drop n
	
	* Merge in old
	qui: merge 1:1 LopNr using temp, nogen
	qui: keep if LopNrParID!=.

	* Rename
	if "`3'"!="" {
		rename `2' `3'
	}
	
	* Save
	qui: save temp, replace

end

* Add father data
capture program drop fatherdata
program define fatherdata

	* Save existing as temp and load new
	qui: save temp, replace
	qui: use `1'
	qui: keep LopNr `2'
	qui: rename LopNr LopNrFar
	qui: rename `2' father_`2'
	
	* Drop duplicates
	qui: bysort LopNrFar: gen n=[_n]
	qui: keep if n==1
	qui: drop n
	
	* Merge in old
	qui: merge 1:m LopNrFar using temp, nogen
	qui: keep if LopNrParID!=.
	
	* Save
	qui: save temp, replace

end

* Add mother data
capture program drop motherdata
program define motherdata

	* Save existing as temp and load new
	qui: save temp, replace
	qui: use `1'
	qui: keep LopNr `2'
	qui: rename LopNr LopNrMor
	qui: rename `2' mother_`2'
	
	* Drop duplicates
	qui: bysort LopNrMor: gen n=[_n]
	qui: keep if n==1
	qui: drop n
	
	* Merge in old
	qui: merge 1:m LopNrMor using temp, nogen
	qui: keep if LopNrParID!=.

	
	* Save
	qui: save temp, replace

end





* ***********************
* Setup twin data frame *
* ***********************

* Start from cohort file from Swedish Twin Register
use D:\STR_RiskScores\Stata_new\STR\cohort_combined.dta, clear
qui: label variable LopNrParID "Twin pair ID"
qui: label variable LopNr "ID twin"

* Coefficient of relatedness
qui: gen Relatedness=.
qui: replace Relatedness=0.5 if BESTZYG==2|BESTZYG==4
qui: replace Relatedness=1 if BESTZYG==1
qui: label variable Relatedness "Coefficient of relatedness in twin pair"
qui: keep if Relatedness!=.
qui: gen zyg=3-(2*Relatedness)
	
* Make sex 0-1 instead 1-2
qui: replace SEX=SEX-1
qui: rename SEX Sex
qui: label variable Sex "Sex of twin"

* Construct birth year variable from birth month variable
destring Byear, replace
qui: gen BirthYear=floor(Byear/100)
qui: label variable BirthYear "Birth year of twin"
qui: drop Byear

* Number in twinpair
qui: sort LopNrParID LopNr
qui: by LopNrParID: gen TwinNr=[_n]
qui: label variable TwinNr "Twin nr (first used)"

* Add parents ID numbers from multigeneration register, from Statistics Sweden
qui: merge 1:1 LopNr using D:\STR_RiskScores\Stata_new\FlerGen\Foraldrar.dta, nogen
qui: sort LopNrParID LopNrFar
qui: bysort LopNrParID: replace LopNrFar=LopNrFar[1]
qui: sort LopNrParID LopNrMor
qui: bysort LopNrParID: replace LopNrMor=LopNrMor[1]
	
* Birth municipality from the 1960 census data from Statistics Sweden, which is
* converted to modern equivalents using conversion scheme from 1960 to 2005.
getdata "D:\STR_RiskScores\Stata_new\FoB\FoB60.dta" Kommun
rename Kommun kod60
merge m:1 kod60 using "conv60.dta", nogen
rename kod05 modern_BirthMunicipality
rename kod60 old_BirthMunicipality
keep if LopNr!=.

* Add age at year 2010 (when the outcomes are measured)
gen age=2010-BirthYear

* Reorder data
qui: order BirthYear LopNrParID zyg LopNr TwinNr LopNrFar LopNrMor Sex age


* Add all outcomes from the SALTY survey, from STR
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_1
rename ATTITYD33_1 decrease_public_sector
replace decrease_public_sector=. if decrease_public_sector==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_2
rename ATTITYD33_2 decrease_defense_spending
replace decrease_defense_spending=. if decrease_defense_spending==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_3
rename ATTITYD33_3 decrease_welfare
replace decrease_welfare=. if decrease_welfare==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_4
rename ATTITYD33_4 lower_taxes
replace lower_taxes=. if lower_taxes==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_5
rename ATTITYD33_5 keep_property_taxes
replace keep_property_taxes=. if keep_property_taxes==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_6
rename ATTITYD33_6 sell_public_enterprise
replace sell_public_enterprise=. if sell_public_enterprise==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_7
rename ATTITYD33_7 decrease_economic_inequality
replace decrease_economic_inequality=. if decrease_economic_inequality==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_8
rename ATTITYD33_8 more_private_healthcare
replace more_private_healthcare=. if more_private_healthcare==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_9
rename ATTITYD33_9 decrease_finmarket_impact
replace decrease_finmarket_impact=. if decrease_finmarket_impact==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_10
rename ATTITYD33_10 keep_maxtaxa
replace keep_maxtaxa=. if keep_maxtaxa==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_11
rename ATTITYD33_11 more_freeschools
replace more_freeschools=. if more_freeschools==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_12
rename ATTITYD33_12 earlier_grades
replace earlier_grades=. if earlier_grades==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_13
rename ATTITYD33_13 more_support_countryside
replace more_support_countryside=. if more_support_countryside==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_14
rename ATTITYD33_14 six_hour_workday
replace six_hour_workday=. if six_hour_workday==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_15
rename ATTITYD33_15 ban_pornography
replace ban_pornography=. if ban_pornography==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_16
rename ATTITYD33_16 limit_abortion
replace limit_abortion=. if limit_abortion==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_17
rename ATTITYD33_17 harder_punishment
replace harder_punishment=. if harder_punishment==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_18
rename ATTITYD33_18 better_animal_protection
replace better_animal_protection=. if better_animal_protection==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_19
rename ATTITYD33_19 no_nuclear_power
replace no_nuclear_power=. if no_nuclear_power==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_20
rename ATTITYD33_20 no_cars_in_cities
replace no_cars_in_cities=. if no_cars_in_cities==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_21
rename ATTITYD33_21 decrease_pollution
replace decrease_pollution=. if decrease_pollution==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_22
rename ATTITYD33_2 less_carbondioxide
replace less_carbondioxide=. if less_carbondioxide==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_23
rename ATTITYD33_23 more_skilled_immigration
replace more_skilled_immigration=. if more_skilled_immigration==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_24
rename ATTITYD33_24 language_test_citizenship
replace language_test_citizenship=. if language_test_citizenship==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_25
rename ATTITYD33_25 decrease_aid
replace decrease_aid=. if decrease_aid==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_26
rename ATTITYD33_26 fewer_refugees
replace fewer_refugees=. if fewer_refugees==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_27
rename ATTITYD33_27 more_support_immigrant_culture
replace more_support_immigrant_culture=. if more_support_immigrant_culture==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_28
rename ATTITYD33_28 abolish_debt
replace abolish_debt=. if abolish_debt==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_29
rename ATTITYD33_29 more_freedom_companies
replace more_freedom_companies=. if more_freedom_companies==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_30
rename ATTITYD33_30 leave_eu
replace leave_eu=. if leave_eu==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_31
rename ATTITYD33_31 instate_euro
replace instate_euro=. if instate_euro==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_32
rename ATTITYD33_32 join_nato
replace join_nato=. if join_nato==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_33
rename ATTITYD33_3 more_free_trade
replace more_free_trade=. if more_free_trade==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD33_34
rename ATTITYD33_34 support_war_on_terror
replace support_war_on_terror=. if support_war_on_terror==9



* Get AMBI measure from SALTY survey, from STR
forvalues i=1(1)16 {
	getdata "D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta" PERSONLIGHET1_`i'
	replace PERSONLIGHET1_`i' = . if PERSONLIGHET1_`i'==9
	rename PERSONLIGHET1_`i' tp`i'
}
gen AMBI = 0-tp1-tp2-tp3+tp4-tp5+tp6-tp7-tp8+tp9-tp10-tp11-tp12+tp13-tp14+tp15+tp16
gen extraversion_SALTY = (8-AMBI)/32
drop tp*


* Get locus of control from SALTY survey, from STR
forvalues i=1(1)12 {
	getdata "D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta" PERSONLIGHET2_`i'
	replace PERSONLIGHET2_`i' = . if PERSONLIGHET2_`i'==9
	rename PERSONLIGHET2_`i' tp`i'
}
gen LOC = tp1-tp2-tp3-tp4+tp5+tp6+tp7-tp8-tp9-tp10-tp11-tp12
su LOC
gen LOC_SALTY = (LOC-r(min))/(r(max)-r(min))




* Wealth register variables
foreach i in 1999 2000 2001 2002 2003 2004 2005 2006 2007 {
	getdata D:\STR_RiskScores\Stata_new\Formog\Formogenhet`i'.dta FNETTMV
	rename FNETTMV net_assets`i'
	getdata D:\STR_RiskScores\Stata_new\Formog\Formogenhet`i'.dta FREALMV
	rename FREALMV real_assets`i'
	getdata D:\STR_RiskScores\Stata_new\Formog\Formogenhet`i'.dta  FSUMMV
	rename FSUMMV assets`i'
	getdata D:\STR_RiskScores\Stata_new\Formog\Formogenhet`i'.dta  FFINMV
	rename FFINMV fin_assets`i'
	gen debt`i'=assets`i'-net_assets`i'
}



* Income variables from the LISA databases, from Statistics Sweden
foreach i in 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 {
	getdata D:\STR_RiskScores\Stata_new\LISA\LISA_`i'.dta ForvErs
	rename ForvErs income`i'
}
gen income10=(income2000+income2001+income2002+income2003+income2004+income2005+income2006+income2007+income2008+income2009)/100
winsor2 income10, cuts(0 99) trim
su income10_tr
replace income10_tr=(income10_tr-r(min))/(r(max)-r(min))



* Occupation codes in 2009 from the LISA database, from Statistics Sweden
getdata D:\STR_RiskScores\Stata_new\LISA\LISA_2009 Ssyk3
replace Ssyk3="0" if Ssyk3=="***"
destring Ssyk3, replace
gen occupation=floor(Ssyk3/10)




* Get altruism variables from the SALTY survey, from STR
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD16
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD17
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD18
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD19
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD20
replace ATTITYD16=. if ATTITYD16==9
replace ATTITYD19=. if ATTITYD19==9
replace ATTITYD20=. if ATTITYD20==9
gen altruism=(ATTITYD16-1) + (ATTITYD18-1) + (ATTITYD19-1)/6 + (ATTITYD20-1)/5



* Construct risk preference measure from the SALTY survey, from STR
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL1
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL2
replace MORAL1=. if MORAL1==99
replace MORAL2=. if MORAL2==99
gen risk_preference=MORAL1+MORAL2



* Construct utilitarianism from the SALTY survey, from STR
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL31
replace MORAL31=. if MORAL31>2
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL32
replace MORAL32=. if MORAL32>2
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL33
replace MORAL33=. if MORAL33>2
gen utilitarian=MORAL31+MORAL32+MORAL33-3



* Construct antisocial attitudes from the SALTY survey, from STR
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL34_1
replace MORAL34_1=. if MORAL34_1==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL34_2
replace MORAL34_2=. if MORAL34_2==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL34_3
replace MORAL34_3=. if MORAL34_3==9
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta MORAL34_4
replace MORAL34_4=. if MORAL34_4==9
gen antisocial=1-((MORAL34_1+MORAL34_2+MORAL34_3+MORAL34_4-4)/16)



* Construct trust variable from the SALTY survey, from STR
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD2
getdata D:\STR_RiskScores\Stata_new\STRSalty\slty_map.dta ATTITYD3
replace ATTITYD2=. if ATTITYD2==99
replace ATTITYD3=. if ATTITYD3==99
gen trust=ATTITYD2+ATTITYD3



* Level of education for parents, from the LISA databases, from Statistics Sweden
fatherdata D:\STR_RiskScores\Stata_new\LISA\LISA_1990 Sun2000niva
rename father_Sun2000niva Sun2000niva
gen father_education=.
replace father_education=7 if Sun2000niva<200
replace father_education=9 if inrange(Sun2000niva,200,299)
replace father_education=9.5 if Sun2000niva==204
replace father_education=10 if inrange(Sun2000niva,310,319)
replace father_education=11 if inrange(Sun2000niva,320,329)
replace father_education=12 if inrange(Sun2000niva,330,339)
replace father_education=13 if inrange(Sun2000niva,410,419)
replace father_education=14 if inrange(Sun2000niva,520,529)
replace father_education=15 if inrange(Sun2000niva,530,539)
replace father_education=16 if inrange(Sun2000niva,540,549)
replace father_education=17 if inrange(Sun2000niva,550,559)
replace father_education=18 if inrange(Sun2000niva,600,629)
replace father_education=19 if inrange(Sun2000niva,640,649)
drop Sun2000niva
fatherdata D:\STR_RiskScores\Stata_new\FoB\FoB70 UtbNiva
gen temp = 7 if father_UtbNiva==1
replace temp = 9 if father_UtbNiva==2
replace temp = 11 if father_UtbNiva==3
replace temp = 12 if father_UtbNiva==4
replace temp = 15 if father_UtbNiva==5
replace temp = 17 if father_UtbNiva==6
replace temp = 19 if father_UtbNiva==7
replace father_education=temp if father_education==.
drop father_UtbNiva temp

motherdata D:\STR_RiskScores\Stata_new\LISA\LISA_1990 Sun2000niva
rename mother_Sun2000niva Sun2000niva
gen mother_education=.
replace mother_education=7 if Sun2000niva<200
replace mother_education=9 if inrange(Sun2000niva,200,299)
replace mother_education=9.5 if Sun2000niva==204
replace mother_education=10 if inrange(Sun2000niva,310,319)
replace mother_education=11 if inrange(Sun2000niva,320,329)
replace mother_education=12 if inrange(Sun2000niva,330,339)
replace mother_education=13 if inrange(Sun2000niva,410,419)
replace mother_education=14 if inrange(Sun2000niva,520,529)
replace mother_education=15 if inrange(Sun2000niva,530,539)
replace mother_education=16 if inrange(Sun2000niva,540,549)
replace mother_education=17 if inrange(Sun2000niva,550,559)
replace mother_education=18 if inrange(Sun2000niva,600,629)
replace mother_education=19 if inrange(Sun2000niva,640,649)
drop Sun2000niva
motherdata D:\STR_RiskScores\Stata_new\FoB\FoB70 UtbNiva
gen temp = 7 if mother_UtbNiva==1
replace temp = 9 if mother_UtbNiva==2
replace temp = 11 if mother_UtbNiva==3
replace temp = 12 if mother_UtbNiva==4
replace temp = 15 if mother_UtbNiva==5
replace temp = 17 if mother_UtbNiva==6
replace temp = 19 if mother_UtbNiva==7
replace mother_education=temp if mother_education==.
drop mother_UtbNiva temp

* Define highest parental education
gen parents_education=mother_education
replace parents_education=father_education if father_education>mother_education & father_education!=.


* Parental income 1970, from the 1970 census database, from Statistics Sweden
fatherdata D:\STR_RiskScores\Stata_new\FoB\FoB70.dta ArbInk
rename father_ArbInk father_income70
motherdata D:\STR_RiskScores\Stata_new\FoB\FoB70.dta ArbInk
rename mother_ArbInk mother_income70
gen parents_income=mother_income70
replace parents_income=father_income70 if father_income70>mother_income70 & father_income70!=.




* Level of education 1991-2010 from the LISA databases, from Statistics Sweden
forvalues i=1991(1)2010 {
	getdata D:\STR_RiskScores\Stata_new\LISA\LISA_2009 Sun2000niva
	gen education_years`i'_=.
	
	replace education_years`i'_=7 if Sun2000niva<200
	replace education_years`i'_=9 if inrange(Sun2000niva,200,299)
	replace education_years`i'_=9.5 if Sun2000niva==204
	replace education_years`i'_=10 if inrange(Sun2000niva,310,319)
	replace education_years`i'_=11 if inrange(Sun2000niva,320,329)
	replace education_years`i'_=12 if inrange(Sun2000niva,330,339)
	replace education_years`i'_=13 if inrange(Sun2000niva,410,419)
	replace education_years`i'_=14 if inrange(Sun2000niva,520,529)
	replace education_years`i'_=15 if inrange(Sun2000niva,530,539)
	replace education_years`i'_=16 if inrange(Sun2000niva,540,549)
	replace education_years`i'_=17 if inrange(Sun2000niva,550,559)
	replace education_years`i'_=18 if inrange(Sun2000niva,600,629)
	replace education_years`i'_=19 if inrange(Sun2000niva,640,649)
	
	drop Sun2000niva
}
egen education_years = rowmax(education_years1991_-education_years2010_)




* Parents birth years, from the multigeneration register, from Statistics Sweden
motherdata D:\STR_RiskScores\Stata_new\FlerGen\FodelseUppg.dta FodArMan
fatherdata D:\STR_RiskScores\Stata_new\FlerGen\FodelseUppg.dta FodArMan
gen mother_BirthYear = floor(mother_FodArMan/100)
gen father_BirthYear = floor(mother_FodArMan/100)




* IQ data from conscription sources
save temp.dta, replace
use "D:\STR_RiskScores\Stata_new\Monstring\insark.dta", clear
forvalues i=1(1)4 {
	destring stp`i', replace force
	replace stp`i'=. if stp`i'<1|stp`i'>9
}
gen iq1 = (stp1+stp2+stp3+stp4)/4
keep LopNr iq*

append using "D:\STR_RiskScores\Stata_new\Monstring\lev_so__miq_data.dta"
forvalues i=1(1)4 {
	replace stpi`i'=. if stpi`i'==0
}
gen iq2 = (A_standp +B_standp +C_standp +D_standp)/4 if A_standp!=. & E_standp==.
gen iq3 = (A_standp +B_standp +C_standp +D_standp +E_standp)/5 if E_standp!=.
gen iq4 = (stpi1+stpi2+stpi3+stpi4)/4
keep LopNr iq*

append using "D:\STR_RiskScores\Stata_new\Monstring\rekryteringsmyndigheten.dta"
gen iq5=gkap
replace iq5=. if iq3==0
egen IQ=rowmax(iq*)
keep LopNr IQ
keep if IQ!=.
bysort LopNr: gen n=_n
keep if n==1
drop n
save iqdata.dta, replace

use temp.dta, clear
getdata iqdata.dta IQ



* Get twin contact rate (for robustness checks) from the SALT survey, from STR
save temp.dta, replace
use "D:\STR_RiskScores\Stata_new\STRSalt\salt_twinc.dta", clear
destring KONTAKT_ANTAL_GANG_PER_VECKA_V, force replace
destring KONTAKT_ANTAL_GANG_PER_MANAD_V, force replace
destring KONTAKT_ANTAL_GANG_AR_V, force replace
gen kontakt_per_ar=.
replace KONTAKT_ANTAL_GANG_PER_VECKA_V=7 if KONTAKT_ANTAL_GANG_PER_VECKA_V>7
replace kontakt_per_ar=KONTAKT_ANTAL_GANG_PER_VECKA_V*52 if KONTAKT_ANTAL_GANG_PER_VECKA_V!=.
replace kontakt_per_ar=KONTAKT_ANTAL_GANG_PER_MANAD_V*12 if KONTAKT_ANTAL_GANG_PER_MANAD_V!=.
replace kontakt_per_ar=KONTAKT_ANTAL_GANG_AR_V if KONTAKT_ANTAL_GANG_AR_V!=.
replace kontakt_per_ar=0 if KONTAKT_HOW_OFTEN_ALDRIG==5
keep LopNr kontakt_per_ar
merge 1:1 LopNr using temp.dta, nogen
rename kontakt_per_ar contact


* Finally, keep only MZ twins and save
keep if zyg==1
save "C:\Userdata\Shared\Dofiles\PrelDoFiles\PrelRafael\Quantifying bias\dataset.dta", replace
erase temp.dta

